This document is a practice analysis on Ziesel dataset

Data Import

Import dataset

# Import Ziesel dataset
dat <- read.csv("Zeisel_preprocessed.csv", row.names = 1)
cell_type <- read.table("Zeisel_cell_info.txt", sep = "\t", header = 1)

# Get the labels for each cell
cluster_labels <- as.numeric(as.factor(cell_type$level1class))
cell_labels <- unique(cell_type$level1class)
rand_ind <- c()

for (cell in cell_labels){
  set.seed(10)
  subcell_ind <- which(cell_type$level1class == cell)
  sub_rand <- sample(length(subcell_ind),
                     length(subcell_ind)/10)
  rand_ind <- c(rand_ind, subcell_ind[sub_rand])
}

dat_hclust <- hclust(dist(t(dat)))
dat_index <- dat_hclust$order


sub_dat <- dat[rand_ind, dat_index]

sub_celltype <- cell_type[rand_ind, ]
sub_cluster_labels <- as.numeric(as.factor(sub_celltype$level1class))

Dependence Measures

1. Pearson’s correlation coefficient

  • it measures the linear dependence.
  • the runtime is very short compared to other methods.
cor_pearson_mat <- stats::cor(sub_dat, method = "pearson")

cor_pearson_mat[upper.tri(cor_pearson_mat, diag = T)] <- NA
cor_pearson_mat[1:5,1:5]
##            Zwint      Bex4    Ndfip1     Mgst3 Atp5g1
## Zwint         NA        NA        NA        NA     NA
## Bex4   0.9546517        NA        NA        NA     NA
## Ndfip1 0.9115323 0.9141624        NA        NA     NA
## Mgst3  0.8603807 0.7934906 0.7425758        NA     NA
## Atp5g1 0.8734328 0.8337454 0.8192167 0.9097877     NA
# plot the smallest correlations
cor_pearson_vec <- sort(abs(cor_pearson_mat), decreasing = T)
plot(cor_pearson_vec)

#plot the high correlations
par(mfrow = c(2,2))
for(i in 1:4){
 idx <- which(abs(cor_pearson_mat) == cor_pearson_vec[i], arr.ind = T)
 idx1 <- idx[1]; idx2 <- idx[2]
 
 plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(cor_pearson_mat[idx1, idx2], 3)))
}

#plot the lowest correlations
par(mfrow = c(2,2))
for(i in 1:4){
 idx <- which(abs(cor_pearson_mat) == rev(cor_pearson_vec)[i], arr.ind = T)
 idx1 <- idx[1]; idx2 <- idx[2]
 
 plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(cor_pearson_mat[idx1, idx2], 3)))
}

Pearson_Heatmap

Pearson_Heatmap

Abs_Pearson_Heatmap

Abs_Pearson_Heatmap

2. Spearman’s correlation coefficient

  • captures monotonous relationship within data.
  • the runtime is very short compared to other methods.
cor_spearman_mat <- stats::cor(sub_dat, method = "spearman")

cor_spearman_mat[upper.tri(cor_spearman_mat, diag = T)] <- NA
cor_spearman_mat[1:5,1:5]
##            Zwint      Bex4    Ndfip1     Mgst3 Atp5g1
## Zwint         NA        NA        NA        NA     NA
## Bex4   0.9352655        NA        NA        NA     NA
## Ndfip1 0.9007310 0.8963630        NA        NA     NA
## Mgst3  0.8367449 0.7895434 0.7233568        NA     NA
## Atp5g1 0.8534318 0.8121204 0.8014422 0.8833877     NA
# plot the smallest correlations
cor_spearman_vec <- sort(abs(cor_spearman_mat), decreasing = T)
plot(cor_spearman_vec)

#plot the high correlations
par(mfrow = c(2,2))
for(i in 1:4){
 idx <- which(abs(cor_spearman_mat) == cor_spearman_vec[i], arr.ind = T)
 idx1 <- idx[1]; idx2 <- idx[2]
 
 plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(cor_spearman_mat[idx1, idx2], 3)))
}

#plot the lowest correlations
par(mfrow = c(2,2))
for(i in 1:4){
 idx <- which(abs(cor_spearman_mat) == rev(cor_spearman_vec)[i], arr.ind = T)
 idx1 <- idx[1]; idx2 <- idx[2]
 
 plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(cor_spearman_mat[idx1, idx2], 3)))
}

Spearman_Heatmap

Spearman_Heatmap

Abs_Pearson_Heatmap

Abs_Pearson_Heatmap

3. Kendall’s correlation coefficient, τ

  • alternative method to Spearman’s correlations, identifying monotonic relationships.
  • it takes more time than Pearson and Spearman do, so I decide to apply this method on the subset of dataset of which the size is 300 rows.
#cor_kendall_mat <- stats::cor(sub_dat, method = "kendall")

#cor_kendall_mat[upper.tri(cor_kendall_mat, diag = T)] <- NA
#cor_kendall_mat[1:5,1:5]
# plot the smallest correlations
#cor_kendall_vec <- sort(abs(cor_kendall_mat), decreasing = T)
#plot(cor_kendall_vec)
#plot the high correlations
#par(mfrow = c(2,2))
#for(i in 1:4){
# idx <- which(abs(cor_kendall_mat) == cor_kendall_vec, arr.ind = T)
# idx1 <- idx[i,1]; idx2 <- idx[i,2]
 
# plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
#      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
#      ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
#      main = paste0("Correlation of ", round(cor_kendall_mat[idx1, idx2], 3)))
#}
#plot the lowest correlations
#par(mfrow = c(2,2))
#for(i in 1:4){
# idx <- which(abs(cor_kendall_mat) == rev(cor_kendall_vec), arr.ind = T)
# idx1 <- idx[i,1]; idx2 <- idx[i,2]
 
# plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
#      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
#      ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
#      main = paste0("Correlation of ", round(cor_kendall_mat[idx1, idx2], 3)))
#}

3-1. Faster Kendall’s tau (pcaPP)

library(pcaPP)

faster_kendall_mat <- cor.fk(sub_dat)

faster_kendall_mat[upper.tri(faster_kendall_mat, diag = T)] <- NA
faster_kendall_mat[1:5,1:5]
##            Zwint      Bex4    Ndfip1     Mgst3 Atp5g1
## Zwint         NA        NA        NA        NA     NA
## Bex4   0.7870143        NA        NA        NA     NA
## Ndfip1 0.7274092 0.7295477        NA        NA     NA
## Mgst3  0.6381381 0.5793976 0.5358995        NA     NA
## Atp5g1 0.6610702 0.6017836 0.5977796 0.7145327     NA
# plot the smallest correlations
faster_kendall_vec <- sort(abs(faster_kendall_mat), decreasing = T)
plot(faster_kendall_vec)

#plot the high correlations
par(mfrow = c(2,2))
for(i in 1:4){
 idx <- which(abs(faster_kendall_mat) == faster_kendall_vec, arr.ind = T)
 idx1 <- idx[i,1]; idx2 <- idx[i,2]
 
 plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(faster_kendall_mat[idx1, idx2], 3)))
}
## Warning in abs(faster_kendall_mat) == faster_kendall_vec: longer object length
## is not a multiple of shorter object length

## Warning in abs(faster_kendall_mat) == faster_kendall_vec: longer object length
## is not a multiple of shorter object length

## Warning in abs(faster_kendall_mat) == faster_kendall_vec: longer object length
## is not a multiple of shorter object length

## Warning in abs(faster_kendall_mat) == faster_kendall_vec: longer object length
## is not a multiple of shorter object length

#plot the lowest correlations
par(mfrow = c(2,2))
for(i in 1:4){
 idx <- which(abs(faster_kendall_mat) == rev(faster_kendall_vec), arr.ind = T)
 idx1 <- idx[i,1]; idx2 <- idx[i,2]
 
 plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(faster_kendall_mat[idx1, idx2], 3)))
}
## Warning in abs(faster_kendall_mat) == rev(faster_kendall_vec): longer object
## length is not a multiple of shorter object length

## Warning in abs(faster_kendall_mat) == rev(faster_kendall_vec): longer object
## length is not a multiple of shorter object length

## Warning in abs(faster_kendall_mat) == rev(faster_kendall_vec): longer object
## length is not a multiple of shorter object length

## Warning in abs(faster_kendall_mat) == rev(faster_kendall_vec): longer object
## length is not a multiple of shorter object length

Kendall_Heatmap

Kendall_Heatmap

Abs_Kendall_Heatmap

Abs_Kendall_Heatmap

4. Distance correlation

  • it is a fully non-parametric measure that identifies non-linear relationships between two random variables with energy distances
library(energy)

cor_dist_mat <- matrix(nrow = ncol(sub_dat), ncol = ncol(sub_dat))

for (i in 2:ncol(sub_dat)){
  for (j in 1:(i-1)){
    cor_dist_mat[i,j] <- dcor2d(as.numeric(sub_dat[, i]), as.numeric(sub_dat[, j]))
  }
}

cor_dist_mat[upper.tri(cor_dist_mat, diag = T)] <- NA
cor_dist_mat[1:5,1:5]
##           [,1]      [,2]      [,3]      [,4] [,5]
## [1,]        NA        NA        NA        NA   NA
## [2,] 0.9018565        NA        NA        NA   NA
## [3,] 0.8064139 0.8115195        NA        NA   NA
## [4,] 0.7639750 0.6665600 0.5744216        NA   NA
## [5,] 0.7853308 0.7157073 0.6575505 0.8330981   NA
# plot the smallest correlations
cor_dist_vec <- sort(abs(cor_dist_mat), decreasing = T)
plot(cor_dist_vec)

#plot the high correlations
par(mfrow = c(2,2))
for(i in 1:4){
 idx <- which(abs(cor_dist_mat) == cor_dist_vec[i], arr.ind = T)
 idx1 <- idx[1]; idx2 <- idx[2]
 
 plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(cor_dist_mat[idx1, idx2], 3)))
}

#plot the lowest correlations
par(mfrow = c(2,2))
for(i in 1:4){
 idx <- which(abs(cor_dist_mat) == rev(cor_dist_vec)[i], arr.ind = T)
 idx1 <- idx[1]; idx2 <- idx[2]
 
 plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(cor_dist_mat[idx1, idx2], 3)))
}

DistanceCorr_Heatmap

DistanceCorr_Heatmap

5. Hoeffding’s D measure

  • tests the independence of the data sets by calculating the distance between the product of the marginal distributions
library(Hmisc)

hoeff_dist <- hoeffd(x = as.matrix(sub_dat))

cor_hoeffd_mat <- hoeff_dist$D

cor_hoeffd_mat[upper.tri(cor_hoeffd_mat, diag = T)] <- NA
# plot the smallest correlations
cor_hoeff_vec <- sort(abs(cor_hoeffd_mat), decreasing = T)
plot(cor_hoeff_vec)

#plot the high correlations
par(mfrow = c(2,2))
for(i in 1:4){
 idx <- which(abs(cor_hoeffd_mat) == (cor_hoeff_vec)[i], arr.ind = T)
 idx1 <- idx[1]; idx2 <- idx[2]
 
 plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(cor_hoeffd_mat[idx1, idx2], 3)))
}

#plot the lowest correlations
par(mfrow = c(2,2))
for(i in 1:4){
 idx <- which(abs(cor_hoeffd_mat) == rev(cor_hoeff_vec)[i], arr.ind = T)
 idx1 <- idx[1,1]; idx2 <- idx[1,2]
 
 plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(cor_hoeffd_mat[idx1, idx2], 3)))
}

Hoeffding's_Heatmap

Hoeffding’s_Heatmap

Abs_Hoeffding's_Heatmap

Abs_Hoeffding’s_Heatmap

6. Mutual information (MI)

  • measures how much one random variable tells us about another.
library(entropy)

cor_MI_mat <- matrix(nrow = ncol(sub_dat), ncol = ncol(sub_dat))

for (i in 2:ncol(sub_dat)){
  for (j in 1:(i-1)){
    y2d <- discretize2d(as.matrix(sub_dat[, i]),
                                   as.matrix(sub_dat[, j]),
                                   numBins1 = 20,
                                   numBins2 = 20)
    cor_MI_mat[i,j] <- as.numeric(mi.empirical(y2d))
  }
}
# plot the smallest correlations
cor_MI_vec <- sort(abs(cor_MI_mat), decreasing = T)
plot(cor_MI_vec)

#plot the high correlations
par(mfrow = c(2,2))
for(i in 1:4){
 idx <- which(abs(cor_MI_mat) == (cor_MI_vec)[i], arr.ind = T)
 idx1 <- idx[1]; idx2 <- idx[1,2]
 
 plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(cor_MI_mat[idx1, idx2], 3)))
}

#plot the lowest correlations
par(mfrow = c(2,2))
for(i in 1:4){
 idx <- which(abs(cor_MI_mat) == rev(cor_MI_vec)[i], arr.ind = T)
 idx1 <- idx[1]; idx2 <- idx[1,2]
 
 plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(cor_MI_mat[idx1, idx2], 3)))
}

MutualInfo_Heatmap

MutualInfo_Heatmap

7. Maximum Information Coefficient (MIC)

library(minerva)

cor_MIC <- mine(sub_dat)
cor_MIC_mat <- cor_MIC$MIC

cor_MIC_mat[upper.tri(cor_MIC_mat, diag = T)] <- NA
cor_MIC_vec <- sort(abs(cor_MIC_mat), decreasing = T)
plot(cor_MIC_vec)

#plot the high correlations
par(mfrow = c(2,2))

for(i in 1:4){
  idx <- which(abs(cor_MIC_mat) == (cor_MIC_vec)[i], arr.ind = T)
  idx1 <- idx[i, 1]; idx2 <- idx[i,2]
  plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(cor_MIC_mat[idx1, idx2], 3)))
}

#plot the lowest correlations
par(mfrow = c(2,2))

for(i in 1:4){
 idx <- which(abs(cor_MIC_mat) == rev(cor_MIC_vec)[i], arr.ind = T)
 idx1 <- idx[1]; idx2 <- idx[2]
 plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(cor_MIC_mat[idx1, idx2], 3)))
}

MIC_Heatmap

MIC_Heatmap

8. Chatterjee’s method

library(XICOR)

cor_XI_mat <- matrix(nrow = ncol(sub_dat), ncol = ncol(sub_dat))

for (i in 2:ncol(sub_dat)){
  for (j in 1:(i-1)){
    cor_XI_mat[i,j] <- calculateXI(as.numeric(sub_dat[, i]), as.numeric(sub_dat[, j]))
  }
}

cor_XI_mat[upper.tri(cor_XI_mat, diag = T)] <- NA
cor_XI_vec <- sort(abs(cor_XI_mat), decreasing = T)
plot(cor_XI_vec)

#plot the high correlations
par(mfrow = c(2,2))
for(i in 1:4){
  idx <- which(abs(cor_XI_mat) == (cor_XI_vec)[i], arr.ind = T)
  idx1 <- idx[1]; idx2 <- idx[2]
  plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(cor_XI_mat[idx1, idx2], 3)))
}

#plot the lowest correlations
par(mfrow = c(2,2))

for(i in 1:4){
 idx <- which(abs(cor_XI_mat) == rev(cor_XI_vec)[i], arr.ind = T)
 idx1 <- idx[i, 1]; idx2 <- idx[i, 2]
 plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(cor_XI_mat[idx1, idx2], 3)))
}

XI_Heatmap

XI_Heatmap

9. Hilbert Schmidt Independence Criterion (HSIC)

library(dHSIC)

cor_HSIC_mat <- matrix(nrow = ncol(sub_dat), ncol = ncol(sub_dat))

for (i in 2:ncol(sub_dat)){
  for (j in 1:(i-1)){
    cor_HSIC_mat[i,j] <- dhsic(as.numeric(sub_dat[, i]),
                               as.numeric(sub_dat[, j]))$dHSIC
  }
}

cor_HSIC_mat[upper.tri(cor_HSIC_mat, diag = T)] <- NA
cor_HSIC_vec <- sort(abs(cor_HSIC_mat), decreasing = T)
plot(cor_HSIC_vec)

#plot the high correlations
par(mfrow = c(2,2))
for(i in 1:4){
  idx <- which(abs(cor_HSIC_mat) == (cor_HSIC_vec)[i], arr.ind = T)
  idx1 <- idx[1]; idx2 <- idx[2]
  plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(cor_HSIC_mat[idx1, idx2], 3)))
}

#plot the lowest correlations
par(mfrow = c(2,2))

for(i in 1:4){
 idx <- which(abs(cor_HSIC_mat) == rev(cor_HSIC_vec)[i], arr.ind = T)
 idx1 <- idx[1]; idx2 <- idx[2]
 plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(cor_HSIC_mat[idx1, idx2], 3)))
}

Pearson_Heatmap

Pearson_Heatmap

10. Blomqvist’s Beta

library(VineCopula)

cor_blomqvist_mat <- BetaMatrix(sub_dat)

cor_blomqvist_mat[upper.tri(cor_blomqvist_mat, diag = T)] <- NA
cor_blomqvist_mat[1:5,1:5]
##           [,1]      [,2]      [,3]      [,4] [,5]
## [1,]        NA        NA        NA        NA   NA
## [2,] 0.8720539        NA        NA        NA   NA
## [3,] 0.7912458 0.8383838        NA        NA   NA
## [4,] 0.7643098 0.6902357 0.7037037        NA   NA
## [5,] 0.7979798 0.7373737 0.7104377 0.8451178   NA
cor_blomqvist_vec <- sort(abs(cor_blomqvist_mat), decreasing = T)
plot(cor_blomqvist_vec)

#plot the high correlations
par(mfrow = c(2,2))
for(i in 1:4){
  idx <- which(abs(cor_blomqvist_mat) == (cor_blomqvist_vec)[i], arr.ind = T)
  idx1 <- idx[i, 1]; idx2 <- idx[i, 2]
  plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(cor_blomqvist_mat[idx1, idx2], 3)))
}

#plot the lowest correlations
par(mfrow = c(2,2))

for(i in 1:4){
 idx <- which(abs(cor_blomqvist_mat) == rev(cor_blomqvist_vec)[i], arr.ind = T)
 idx1 <- idx[i, 1]; idx2 <- idx[i, 2]
 plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(cor_blomqvist_mat[idx1, idx2], 3)))
}

Pearson_Heatmap

Pearson_Heatmap

Find the indices that have the contrast measure of dependency.

# low pearson and high spearman (linearity vs monotone)
cor_contrast1 <- (abs(cor_pearson_mat) < 0.3) & (abs(cor_spearman_mat) > 0.7)
cor_contrast_ind1 <- which(cor_contrast1, arr.ind = T)
nrow(cor_contrast_ind1)
## [1] 10
# high pearson and low spearman (linearity vs monotone)
cor_contrast2 <- (abs(cor_pearson_mat) > 0.80) & (abs(cor_spearman_mat) < 0.20)
cor_contrast_ind2 <- which(cor_contrast2, arr.ind = T)
nrow(cor_contrast_ind2)
## [1] 93
# low pearson and high kendall (linearity vs monotone)
cor_contrast3 <- (abs(cor_pearson_mat) < 0.45) & (abs(faster_kendall_mat) > 0.55)
cor_contrast_ind3 <- which(cor_contrast3, arr.ind = T)
nrow(cor_contrast_ind3)
## [1] 14
# high pearson and low kendall (linearity vs monotone)
cor_contrast4 <- (abs(cor_pearson_mat) > 0.85) & (abs(faster_kendall_mat) < 0.15)
cor_contrast_ind4 <- which(cor_contrast4, arr.ind = T)
nrow(cor_contrast_ind4)
## [1] 46
# low pearson and high distance correlation (linearity vs non-linearity)
cor_contrast5 <- (abs(cor_pearson_mat) < 0.45) & (abs(cor_dist_mat) > 0.55)
cor_contrast_ind5 <- which(cor_contrast5, arr.ind = T)
nrow(cor_contrast_ind5)
## [1] 0
# high pearson and low distance correlation (linearity vs non-linearity)
cor_contrast6 <- (abs(cor_pearson_mat) > 0.7) & (abs(cor_dist_mat) < 0.3)
cor_contrast_ind6 <- which(cor_contrast6, arr.ind = T)
nrow(cor_contrast_ind6)
## [1] 47
# low pearson and high MIC (linearity vs Information)
cor_contrast7 <- (abs(cor_pearson_mat) < 0.25) & (abs(cor_MIC_mat) > 0.75)
cor_contrast_ind7 <- which(cor_contrast7, arr.ind = T)
nrow(cor_contrast_ind7)
## [1] 39
# high pearson and low MIC (linearity vs Information)
cor_contrast8 <- (abs(cor_pearson_mat) > 0.75) & (abs(cor_MIC_mat) < 0.25)
cor_contrast_ind8 <- which(cor_contrast8, arr.ind = T)
nrow(cor_contrast_ind8)
## [1] 19
# low pearson and high XI 
cor_contrast9 <- (abs(cor_pearson_mat) < 0.45) & (abs(cor_XI_mat) > 0.55)
cor_contrast_ind9 <- which(cor_contrast9, arr.ind = T)
nrow(cor_contrast_ind9)
## [1] 18
# high pearson and low XI
cor_contrast10 <- (abs(cor_pearson_mat) > 0.85) & (abs(cor_XI_mat) < 0.15)
cor_contrast_ind10 <- which(cor_contrast10, arr.ind = T)
nrow(cor_contrast_ind10)
## [1] 61
# low MIC and high XI
cor_contrast11 <- (abs(cor_MIC_mat) < 0.4) & (abs(cor_XI_mat) > 0.6)
cor_contrast_ind11 <- which(cor_contrast11, arr.ind = T)
nrow(cor_contrast_ind11)
## [1] 0
# high MIC and low XI
cor_contrast12 <- (abs(cor_MIC_mat) > 0.70) & (abs(cor_XI_mat) < 0.30)
cor_contrast_ind12 <- which(cor_contrast12, arr.ind = T)
nrow(cor_contrast_ind12)
## [1] 164
# low spearman and high distance correlation (monotone vs non-linearity)
cor_contrast13 <- (abs(cor_spearman_mat) < 0.2) & (abs(cor_dist_mat) > 0.8)
cor_contrast_ind13 <- which(cor_contrast13, arr.ind = T)
nrow(cor_contrast_ind13)
## [1] 5
# high spearman and low distance correlation (monotone vs non-linearity)
cor_contrast14 <- (abs(cor_spearman_mat) > 0.6) & (abs(cor_dist_mat) < 0.4)
cor_contrast_ind14 <- which(cor_contrast14, arr.ind = T)
nrow(cor_contrast_ind14)
## [1] 55402
# low spearman and high MIC (monotone vs Information)
cor_contrast15 <- (abs(cor_spearman_mat) < 0.2) & (abs(cor_MIC_mat) > 0.8)
cor_contrast_ind15 <- which(cor_contrast15, arr.ind = T)
nrow(cor_contrast_ind15)
## [1] 24
# high spearman and low MIC (monotone vs Information)
cor_contrast16 <- (abs(cor_spearman_mat) > 0.65) & (abs(cor_MIC_mat) < 0.35)
cor_contrast_ind16 <- which(cor_contrast16, arr.ind = T)
nrow(cor_contrast_ind16)
## [1] 0
# low distance correlation and high MIC (non-linearity vs Information)
cor_contrast17 <- (abs(cor_dist_mat) < 0.4) & (abs(cor_MIC_mat) > 0.6)
cor_contrast_ind17 <- which(cor_contrast17, arr.ind = T)
nrow(cor_contrast_ind17)
## [1] 12047
# high distance correlation and low MIC (non-linearity vs Information)
cor_contrast18 <- (abs(cor_dist_mat) > 0.7) & (abs(cor_MIC_mat) < 0.3)
cor_contrast_ind18 <- which(cor_contrast18, arr.ind = T)
nrow(cor_contrast_ind18)
## [1] 0

Visualization of low pearson (<0.30) and high spearman (>0.7) (linearity vs monotone)

par(mfrow = c(2, 5))
for (i in 1:10){
   index1 <- cor_contrast_ind1[i, 1]; index2 <- cor_contrast_ind1[i, 2]
   plot(sub_dat[,index1], sub_dat[,index2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[index1], ", (", index1, ")"),
      ylab = paste0(colnames(sub_dat)[index2], ", (", index2, ")"), 
      main = paste(paste0("Pearson of ", round(cor_pearson_mat[index1, index2], 3)),
                   "\n",
                   paste0("Spearman of ", round(cor_spearman_mat[index1, index2], 3))))
}

Visualization of high pearson (<0.8) and low spearman (<0.2) (linearity vs monotone)

par(mfrow = c(2, 5))
for (i in 1:10){
   index1 <- cor_contrast_ind2[i, 1]; index2 <- cor_contrast_ind2[i, 2]
   plot(sub_dat[,index1], sub_dat[,index2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[index1], ", (", index1, ")"),
      ylab = paste0(colnames(sub_dat)[index2], ", (", index2, ")"), 
      main = paste(paste0("Pearson of ", round(cor_pearson_mat[index1, index2], 3)),
                   "\n",
                   paste0("Spearman of ", round(cor_spearman_mat[index1, index2], 3))))
}

Visualization of low pearson (<0.45) and high kendall (>0.55) (linearity vs monotone)

par(mfrow = c(2, 5))
for (i in 1:10){
   index1 <- cor_contrast_ind3[i, 1]; index2 <- cor_contrast_ind3[i, 2]
   plot(sub_dat[,index1], sub_dat[,index2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[index1], ", (", index1, ")"),
      ylab = paste0(colnames(sub_dat)[index2], ", (", index2, ")"), 
      main = paste(paste0("Pearson of ", round(cor_pearson_mat[index1, index2], 3)),
                   "\n",
                   paste0("Kendall of ", round(faster_kendall_mat[index1, index2], 3))))
}

Visualization of high pearson (<0.85) and low kendall (<0.15) (linearity vs monotone)

par(mfrow = c(2, 5))
for (i in 1:10){
   index1 <- cor_contrast_ind4[i, 1]; index2 <- cor_contrast_ind4[i, 2]
   plot(sub_dat[,index1], sub_dat[,index2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[index1], ", (", index1, ")"),
      ylab = paste0(colnames(sub_dat)[index2], ", (", index2, ")"), 
      main = paste(paste0("Pearson of ", round(cor_pearson_mat[index1, index2], 3)),
                   "\n",
                   paste0("Kendall of ", round(faster_kendall_mat[index1, index2], 3))))
}

Visualization of high pearson (>0.70) and low distance correlation (<0.30) (linearity vs Non-linearity)

par(mfrow = c(2, 5))

for (i in 1:10){
   index1 <- cor_contrast_ind6[i, 1]; index2 <- cor_contrast_ind6[i, 2]
   plot(sub_dat[,index1], sub_dat[,index2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[index1], ", (", index1, ")"),
      ylab = paste0(colnames(sub_dat)[index2], ", (", index2, ")"), 
      main = paste(paste0("Pearson of ", round(cor_pearson_mat[index1, index2], 3)),
                   "\n",
                   paste0("Dist.Cor of ", round(cor_dist_mat[index1, index2], 3))))
}

Visualization of low pearson (< 0.45) and high MIC (> 0.55) (linearity vs Information)

par(mfrow = c(2, 5))

for (i in 1:10){
   index1 <- cor_contrast_ind7[i, 1]; index2 <- cor_contrast_ind7[i, 2]
   plot(sub_dat[,index1], sub_dat[,index2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[index1], ", (", index1, ")"),
      ylab = paste0(colnames(sub_dat)[index2], ", (", index2, ")"), 
      main = paste(paste0("Pearson of ", round(cor_pearson_mat[index1, index2], 3)),
                   "\n",
                   paste0("MIC of ", round(cor_MIC_mat[index1, index2], 3))))
}

Visualization of high pearson (> 0.85) and low MIC (< 0.15) (linearity vs Information)

par(mfrow = c(2,5))

for (i in 1:10){
   index1 <- cor_contrast_ind8[i, 1]; index2 <- cor_contrast_ind8[i, 2]
   plot(sub_dat[,index1], sub_dat[,index2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[index1], ", (", index1, ")"),
      ylab = paste0(colnames(sub_dat)[index2], ", (", index2, ")"), 
      main = paste(paste0("Pearson of ", round(cor_pearson_mat[index1, index2], 3)),
                   "\n",
                   paste0("MIC of ", round(cor_MIC_mat[index1, index2], 3))))
}

Visualization of low pearson (< 0.45) and high XI (> 0.55)

par(mfrow = c(2, 5))

for (i in 1:10){
   index1 <- cor_contrast_ind9[i, 1]; index2 <- cor_contrast_ind9[i, 2]
   plot(sub_dat[,index1], sub_dat[,index2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[index1], ", (", index1, ")"),
      ylab = paste0(colnames(sub_dat)[index2], ", (", index2, ")"), 
      main = paste(paste0("Pearson of ", round(cor_pearson_mat[index1, index2], 3)),
                   "\n",
                   paste0("XI of ", round(cor_XI_mat[index1, index2], 3))))
}

Visualization of high pearson (> 0.85) and low XI (< 0.15)

par(mfrow = c(2,5))

for (i in 1:10){
   index1 <- cor_contrast_ind10[i, 1]; index2 <- cor_contrast_ind10[i, 2]
   plot(sub_dat[,index1], sub_dat[,index2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[index1], ", (", index1, ")"),
      ylab = paste0(colnames(sub_dat)[index2], ", (", index2, ")"), 
      main = paste(paste0("Pearson of ", round(cor_pearson_mat[index1, index2], 3)),
                   "\n",
                   paste0("XI of ", round(cor_XI_mat[index1, index2], 3))))
}

Visualization of high MIC (> 0.70) and low XI (< 0.30)

par(mfrow = c(2,5))

for (i in 1:10){
   index1 <- cor_contrast_ind12[i, 1]; index2 <- cor_contrast_ind12[i, 2]
   plot(sub_dat[,index1], sub_dat[,index2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[index1], ", (", index1, ")"),
      ylab = paste0(colnames(sub_dat)[index2], ", (", index2, ")"), 
      main = paste(paste0("MIC of ", round(cor_MIC_mat[index1, index2], 3)),
                   "\n",
                   paste0("XI of ", round(cor_XI_mat[index1, index2], 3))))
}

Visualization of low Spearman (< 0.20) and high distance correlation (> 0.80) (monotone vs non-linearity)

par(mfrow = c(2,5))

for (i in 1:5){
   index1 <- cor_contrast_ind13[i, 1]; index2 <- cor_contrast_ind13[i, 2]
   plot(sub_dat[,index1], sub_dat[,index2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[index1], ", (", index1, ")"),
      ylab = paste0(colnames(sub_dat)[index2], ", (", index2, ")"), 
      main = paste(paste0("Spearman of ", round(cor_spearman_mat[index1, index2], 3)),
                   "\n",
                   paste0("Dist.Cor of ", round(cor_dist_mat[index1, index2], 3))))
}

Visualization of high Spearman (> 0.60) and low distance correlation (< 0.40) (monotone vs non-linearity)

par(mfrow = c(2,5))

for (i in 1:10){
   index1 <- cor_contrast_ind14[i, 1]; index2 <- cor_contrast_ind14[i, 2]
   plot(sub_dat[,index1], sub_dat[,index2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[index1], ", (", index1, ")"),
      ylab = paste0(colnames(sub_dat)[index2], ", (", index2, ")"), 
      main = paste(paste0("Spearman of ", round(cor_spearman_mat[index1, index2], 3)),
                   "\n",
                   paste0("Dist.Cor of ", round(cor_dist_mat[index1, index2], 3))))
}

Visualization of low Spearman (< 0.2) and high MIC (> 0.80) (monotone vs information)

par(mfrow = c(2,5))

for (i in 1:10){
   index1 <- cor_contrast_ind15[i, 1]; index2 <- cor_contrast_ind15[i, 2]
   plot(sub_dat[,index1], sub_dat[,index2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[index1], ", (", index1, ")"),
      ylab = paste0(colnames(sub_dat)[index2], ", (", index2, ")"), 
      main = paste(paste0("Spearman of ", round(cor_spearman_mat[index1, index2], 3)),
                   "\n",
                   paste0("MIC of ", round(cor_MIC_mat[index1, index2], 3))))
}

Visualization of low distance correlation (< 0.4) and high MIC (> 0.6) (non-linearity vs information)

par(mfrow = c(2,5))

for (i in 1:10){
   index1 <- cor_contrast_ind17[i, 1]; index2 <- cor_contrast_ind17[i, 2]
   plot(sub_dat[,index1], sub_dat[,index2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[index1], ", (", index1, ")"),
      ylab = paste0(colnames(sub_dat)[index2], ", (", index2, ")"), 
      main = paste(paste0("Dist.Cor of ", round(cor_spearman_mat[index1, index2], 3)),
                   "\n",
                   paste0("MIC of ", round(cor_MIC_mat[index1, index2], 3))))
}

Extract meaningful indices for celltype_practice

indices <- c(cor_contrast_ind1, cor_contrast_ind2, cor_contrast_ind3,
             cor_contrast_ind4, cor_contrast_ind5, cor_contrast_ind6,
             cor_contrast_ind7, cor_contrast_ind8, cor_contrast_ind9,
             cor_contrast_ind10, cor_contrast_ind11, cor_contrast_ind12,
             cor_contrast_ind13, cor_contrast_ind14, cor_contrast_ind15,
             cor_contrast_ind16, cor_contrast_ind17, cor_contrast_ind18)
indices <- unique(indices)

#save(indices, file = "meaningful_indices.RData")

Store correlation matrices for future usage.

# save(cor_pearson_mat,
#     cor_spearman_mat,
#     faster_kendall_mat,
#     cor_dist_mat,
#     cor_hoeffd_mat,
#     cor_MI_mat,
#     cor_MIC_mat,
#     cor_XI_mat,
#     cor_HSIC_mat,
#     cor_blomqvist_mat,
#     file = "Correlation_data.RData")